#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
简短的PDF图片提取示例代码
"""

from rag.emm.Boos_rag import extract_images_only

def extract_pdf_images(pdf_path, save_dir="images"):
    """
    提取PDF中的图片
    
    Args:
        pdf_path: PDF文件路径
        save_dir: 图片保存目录
    
    Returns:
        list: 提取的图片信息列表
    """
    try:
        # 提取图片
        images = extract_images_only(pdf_path, save_dir=save_dir)
        
        # 显示结果
        print(f"成功提取 {len(images)} 张图片")
        for img in images:
            if 'error' not in img:
                print(f"第{img['page']}页: {img['name']} ({img['actual_width']}x{img['actual_height']})")
        
        return images
    except Exception as e:
        print(f"提取失败: {e}")
        return []

# 使用示例
if __name__ == "__main__":
    # 提取PDF图片
    pdf_file = "rag/emm/Boss 直聘平台使用指南.pdf"
    images = extract_pdf_images(pdf_file, "extracted_images")
    
    # 显示第一张图片的详细信息
    if images and 'error' not in images[0]:
        first_img = images[0]
        print(f"\n第一张图片详情:")
        print(f"页码: {first_img['page']}")
        print(f"名称: {first_img['name']}")
        print(f"尺寸: {first_img['actual_width']} x {first_img['actual_height']}")
        print(f"文件大小: {first_img['file_size']} 字节")
        if 'saved_path' in first_img:
            print(f"保存路径: {first_img['saved_path']}")
